Load Tidyverse Package

library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.7
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.1.0     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
getwd()
## [1] "C:/Users/kchz694/OneDrive - AZCollaboration/Desktop/R_DataAnalytics_May2022/IP_R Code"

##Setup working directory

setwd("C:/Users/kchz694/OneDrive - AZCollaboration/From Desktop 8-5-20/RT R Scripts/R_scripts/TV_RMD")
getwd()
## [1] "C:/Users/kchz694/OneDrive - AZCollaboration/From Desktop 8-5-20/RT R Scripts/R_scripts/TV_RMD"

Read csv file from folder

## Rows: 21 Columns: 85
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## dbl (85): Day, gp1_1, gp1_2, gp1_3, gp1_4, gp1_5, gp1_6, gp2_1, gp2_2, gp2_3...
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## # A tibble: 6 x 85
##     Day gp1_1 gp1_2 gp1_3 gp1_4 gp1_5 gp1_6 gp2_1 gp2_2 gp2_3 gp2_4 gp2_5 gp2_6
##   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1    22  240.  106.  166.  111.  172.  147.  151.  241.  106.  111.  131.  178.
## 2    26  257.  125.  203.  140.  201.  217.  223.  243.  152.  191.  193.  230.
## 3    29  270.  231.  306.  204.  329.  281.  274.  306.  229.  206.  238.  257.
## 4    33  346.  306.  482.  268.  368.  423.  339.  474.  247.  220.  282.  283.
## 5    36  378.  364.  513.  273.  446.  465.  393.  550.  277.  306.  320.  341.
## 6    40  459.  374.  769.  316.  382.  676.  607.  645.  341.  308.  524.  505.
## # ... with 72 more variables: gp3_1 <dbl>, gp3_2 <dbl>, gp3_3 <dbl>,
## #   gp3_4 <dbl>, gp3_5 <dbl>, gp3_6 <dbl>, gp4_1 <dbl>, gp4_2 <dbl>,
## #   gp4_3 <dbl>, gp4_4 <dbl>, gp4_5 <dbl>, gp4_6 <dbl>, gp5_1 <dbl>,
## #   gp5_2 <dbl>, gp5_3 <dbl>, gp5_4 <dbl>, gp5_5 <dbl>, gp5_6 <dbl>,
## #   gp6_1 <dbl>, gp6_2 <dbl>, gp6_3 <dbl>, gp6_4 <dbl>, gp6_5 <dbl>,
## #   gp6_6 <dbl>, gp7_1 <dbl>, gp7_2 <dbl>, gp7_3 <dbl>, gp7_4 <dbl>,
## #   gp7_5 <dbl>, gp7_6 <dbl>, gp8_1 <dbl>, gp8_2 <dbl>, gp8_3 <dbl>, ...

Restructuring the Data

Restructure tidy format from wide format to longer using pivot_longer. Columns gp1_1, gp1_2 etc. to grp (new variable, which can be used to split in the next step)and tumor values to tv

tv2 <- tv2 %>% pivot_longer(cols =2:85, names_to="grp", values_to="tv")
head(tv2)
## # A tibble: 6 x 3
##     Day grp      tv
##   <dbl> <chr> <dbl>
## 1    22 gp1_1  240.
## 2    22 gp1_2  106.
## 3    22 gp1_3  166.
## 4    22 gp1_4  111.
## 5    22 gp1_5  172.
## 6    22 gp1_6  147.

Now using separate func split grp into grp and replicate to get replicate number separate at and remove *_*

tv2 <- tv2 %>% separate(grp, into=c("grp","replicate"),sep="_")
head(tv2,10)
## # A tibble: 10 x 4
##      Day grp   replicate    tv
##    <dbl> <chr> <chr>     <dbl>
##  1    22 gp1   1          240.
##  2    22 gp1   2          106.
##  3    22 gp1   3          166.
##  4    22 gp1   4          111.
##  5    22 gp1   5          172.
##  6    22 gp1   6          147.
##  7    22 gp2   1          151.
##  8    22 gp2   2          241.
##  9    22 gp2   3          106.
## 10    22 gp2   4          111.

Clean up the NA values which got from the blank lines on the original data

tv2 %>%
  filter(!is.na(tv)) -> tv2

head(tv2)
## # A tibble: 6 x 4
##     Day grp   replicate    tv
##   <dbl> <chr> <chr>     <dbl>
## 1    22 gp1   1          240.
## 2    22 gp1   2          106.
## 3    22 gp1   3          166.
## 4    22 gp1   4          111.
## 5    22 gp1   5          172.
## 6    22 gp1   6          147.

To show how paste0 works paste0 so it doesn’t put a space between the two parts.

paste0("gp",1:14)
##  [1] "gp1"  "gp2"  "gp3"  "gp4"  "gp5"  "gp6"  "gp7"  "gp8"  "gp9"  "gp10"
## [11] "gp11" "gp12" "gp13" "gp14"

Need convert grp in to factor becoz in the legend grp numbers shows in the alphabetical order (ex: gp1, gp10,gp11,gp2,etc.To avoid this confusion convert grp into levels using factor func. Join(paste) together Gp to an ordered numerical vector.

tv2 %>%
  mutate(grp = factor(grp, levels=paste0("gp",1:14))) -> tv2
head(tv2)
## # A tibble: 6 x 4
##     Day grp   replicate    tv
##   <dbl> <fct> <chr>     <dbl>
## 1    22 gp1   1          240.
## 2    22 gp1   2          106.
## 3    22 gp1   3          166.
## 4    22 gp1   4          111.
## 5    22 gp1   5          172.
## 6    22 gp1   6          147.

GGPlot summarisation (instead of manual):

Add summarisation to geom_line. Use stat_summary and add errorbar/point.This defaults to using SEM

tv2 %>%
  ggplot(aes(x=Day,y=tv,color=grp)) +
 
  geom_line(stat="summary", fun="mean")+
  stat_summary(geom="errorbar", width=2.0)+
  stat_summary(geom="point", width=0.5)+
  #geom_point(width=0.5)+ =need to add in the stat_summary
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"))+
  coord_cartesian(ylim=c(0,2000))+
  scale_x_continuous(breaks=c(20,40,60,80,100,120,140))+
  xlab("Days")+
  ylab("TV mm3")
## Warning: Ignoring unknown parameters: width
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

ALL the code same place and use for plotly() geom_line(stat=“summary”, fun=“mean”)+ # need to use after stat_summary(), geom_point in order to show lines in the graph

rtplot <- tv2 %>%
  ggplot(aes(x=Day,y=tv,color=grp)) +
  
  stat_summary(geom="errorbar", width=2.0)+
  stat_summary(geom="point", width=0.5)+
  geom_line(stat="summary", fun="mean")+
   #geom_point(width=0.5)+ =need to add in the stat_summary
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"))+
  coord_cartesian(ylim=c(0,2000))+
  scale_x_continuous(breaks=c(20,40,60,80,100,120,140))+
  xlab("Days after Tumor Cells Implant")+
  ylab("TV mm3") 
## Warning: Ignoring unknown parameters: width
rtplot     
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
rtplot1 <- ggplotly(rtplot)     
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
rtplot1